/******************************************************************************
* This program calcualtes the VOL[s, t], CAR[s, t] 
* It include total vol
* It incllude dgtw-adjusted CAR
* It also calcualtes volume and volatility persistence
******************************************************************************/

/******************************************************************************
                             Get Permno-rdq1 pairs
******************************************************************************/
* Download trading dates from crsp;
%let wrds = wrds.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
run;

rsubmit;
libname crsp '/wrds/crsp/sasdata/a_stock';

data tdays;
	set crsp.dsi;
	keep date;
	format date date9.;
	label date=' ';
	run;

proc sort data = tdays nodupkey; by date; run; 
data tdays; 
	set tdays; 
	number = _n_; run; 
proc download data = tdays out = rep.tdays; run; 

endrsubmit; 

* Get permno-rdq1 paris;
proc sql;
  create table permno_rdq1
  as select a.*, b.number
  from (select distinct lpermno as permno, rdq1
  from elog.earnings) as a,
       rep.tdays as b
  where a.rdq1 = b.date
  order by permno, rdq1;
  quit;

** Check how many trading days are between the current rdq1 and the next rdq1;
proc sort data = permno_rdq1; by permno descending rdq1; run;
data permno_rdq1;
  set permno_rdq1;
  by permno descending rdq1;
  days = lag(number) - number;
  if first.permno then days = . ;
  run;
proc freq data = permno_rdq1;
  tables days;
  run;
proc sort data = permno_rdq1; by permno  rdq1; run;
data permno_rdq1;
  set permno_rdq1;
  by permno rdq1;
  days_pre = number-lag(number);
  if first.permno then days_pre = . ;
  run;

/******************************************************************************
                               Daily Total Volume
******************************************************************************/
* I will consistent in variable naming: vol are unadjusted volume, vom are
* split adjusted and logged volume;

*** Total Daily Trades from CRSP;
* need crsp.dsf downloaded as crsp.crsp_d;
libname crsp "E:\CRSP";

  data crsp_d;
    set crsp.crsp_d;
    if shrcd in (10,11);
    /*  if exchcd in (1 2 3);*/
    P = abs(prc)/cfacpr;

    *adjust NASDAQ volume according to Gao and Ritter (2010);
    vol_adj = vol;
    if exchcd=3 and date < mdy(02,01, 2001) then
      vol_adj = vol/2;
    if exchcd=3 and mdy(02,1, 2001)<= date <= mdy(12,31, 2001) then
      vol_adj = vol/1.8;
    if exchcd=3 and year(date) in (2002 2003) then
      vol_adj = vol/1.6;
    if exchcd=3 and year(date) >=2004 then
      vol_adj = vol/1.0;
    VOM = log(1+vol_adj*cfacshr);
    * in the version before 10/20/2018, the above one-line code was
      DVOM = log(1+VOM*P) and it was wrong;
    DVOM = log(1+vol_adj*abs(prc));
    TSO = shrout*cfacshr*1000;

    turn = vol_adj/(shrout*1000);

    logturn = log(turn);

    if TSO <= 0 then TSO = .;
    if VOM < 0 then VOM = .;
    if DVOM < 0 then DVOM = .;

    ME = P*TSO;

    label P = "Price at Period End, Adjusted";
    label TSO = "Total Shares Outstanding, Adjusted";
    label VOM = "Trading Volume Daily (logged), Adjusted";
    label DVOM = "Trading Volume Daily in Dollars, (logged) Adjusted";
    label turn = "Turnover";
    label ME = "Market Equity";
    format ret percentn8.4 P DVOM dollar12.3 TSO VOM comma12. ;

    keep permno date vol_adj VOM DVOM TSO turn logturn ME prc ret cfacshr
    shrout exchcd;
    run;

*** Calculate DGTW returns;

  proc sql;

    create table crsp_d
    as select distinct a.*, b.date as formdate "Formation Date", b.dgtw_port
    from crsp_d as a left join rep.dgtw_assignment_wrds as b
    on a.permno=b.permno and b.date < a.date<=intnx('month', b.date,12,'e');

    create table crsp_d
    as select a.*,b.DGTW_VWRET format percentn8.4 "DGTW Benchmark Return",
      (a.ret-b.DGTW_VWRET) as abret "DGTW Excess Return" format percentn8.4
    from crsp_d as a left join rep.dgtw_vwret as b
    on a.dgtw_port=b.dgtw_port and a.date=b.date;
  quit;



/******************************************************************************
                      Fill in [-41, 61] with Daily Trades
******************************************************************************/

*** Fill in with respect to permno-rdq1 [-41, 61] window;

  * Keep everything in line with volume methodology;

  proc sql;
    create table time_series
    as select
        a.*,
        b.rdq1,
        b.number as rdqnumber,
        b.days,
        b.days_pre
    from (select a.*, b.number as datenumber from crsp_d as a,
          rep.tdays_2018 as b where a.date = b.date)
          as a, permno_rdq1 as b
    where a.permno = b.permno and
          -min(41,days_pre)<=a.datenumber-b.number<=max(70, days);
    quit;

*** Get abnormal volume respectively;

  data time_series;
    set time_series;
    rnumber = datenumber-rdqnumber;
    run;

  proc sql;
    create table pre_window
    as select distinct permno, rdq1,
      mean(vom) as vom_pre,
      count(date) as countdays
    from time_series
    /* in case the previous rdq1 is less than 41 days ago, use [previous
     rdq1+2, current rdq1-11]*/
    where -min(41,days_pre-2)<=rnumber<=-11
    group by permno, rdq1
    having calculated countdays = -11+min(41, days_pre-2)+1
    order by permno, rdq1;
    quit;

  proc sql;
    create table vomab
    as select a.*,
      a.vom - b.vom_pre as vom_ab_pre,
    from time_series as a, pre_window as b
    where a.permno = b.permno and a.rdq1 = b.rdq1
    order by a.permno, a.rdq1, a.date;
    quit;

/******************************************************************************
                                  Get Averages
******************************************************************************/

%macro pre_vol(input = , output= , var = );
  * delete ffret: dataset;
  proc datasets library=work noprint;
     delete ffret:;
  run;
  %let start = %str(-20, -20, -20, -10, -10, -10, -1, 0, 2, 2, 2,   2,  2,  2, 2, 2);
  %let end   = %str(-3,   -5, -11,  -1,  -2,  -3,  1, 1, 3, 5, 10, 20, 30, 40, 50, 61);

  %let nwords=%sysfunc(countw(&start));
  %do i=1 %to &nwords;
    %let s = %qscan(&start,&i,%str(,));
    %let e = %scan(&end, &i, %str(,));
    %let d = %eval(&e-&s+1);

    %let s0 = %sysfunc(abs(&s));
    %let e0 = %sysfunc(abs(&e));
    %if &e<0 %then %let e0 =_&e0;
    %if &s<0 %then %let s0 =_&s0;


    %if &var = abret %then %do;
      proc sql;
        create table ffret&i
          as select distinct permno, rdq1,
             exp(sum(log(1+ret)))-exp(sum(log(1+dgtw_vwret))) as abcret&s0.&e0._dgtw_r,
             count(rdq1) as countdays

          from &input
          where &s<=rnumber<= &e and days -1 >= &e
          group by permno, rdq1
          having calculated countdays = &d
          order by permno, rdq1;
        quit;
    %end;
    %else %do;
      proc sql;
        create table ffret&i
          as select distinct permno, rdq1,
             mean(&var._ab_pre) as &var._ab&s0.&e0._pre_r,
             count(rdq1) as countdays

          from &input
          where &s<=rnumber<= &e and days -1 >= &e
          group by permno, rdq1
          having calculated countdays = &d
          order by permno, rdq1;
        quit;
    %end;

  %end;

  * from t+1 until next rdq1-1;

  %if &var = abret %then %do;
    proc sql;
    * from t+1 until next rdq1-5, excluding pre-annoucement leakage of next rdq1;
      create table ffret0_2
        as select distinct permno, rdq1,
         exp(sum(log(1+ret)))-exp(sum(log(1+dgtw_vwret))) as abcret295_dgtw_r,
         count(rdq1) as countdays


        from &input
        where 2<=rnumber<= days-4
        group by permno, rdq1
        having calculated countdays = days-5
        order by permno, rdq1;
      quit;
  %end;
  %else %do;
    proc sql;
    * from t+1 until next rdq1-5, excluding pre-annoucement leakage of next rdq1;
      create table ffret0_2
        as select distinct permno, rdq1,
         mean(&var._ab_pre) as &var._ab295_pre_r,
         count(rdq1) as countdays
        from &input
        where 2<=rnumber<= days-4
        group by permno, rdq1
        having calculated countdays = days-5
        order by permno, rdq1;
      quit;

  %end;

  data &output;
    merge permno_rdq1 ffret:;
    by permno rdq1;
    drop countdays;
    run;

%mend;

%pre_vol(input = vomab, output = vom, var = vom);
%pre_vol(input = vomab, output = abret, var = abret);

/******************************************************************************
                       Volatility and Volume Persistence
******************************************************************************/

* until next rdq1-5;
%macro persis(end = , lag = , out = );
data time_series4;
  set vomab;
  if vom_ab_pre ne .;
  if ret ne .;
  absret = abs(ret);
  keep rdq1 permno vom_ab_pre 
       absret ret rnumber days;
  if rnumber>=0 and rnumber<=&end; 
  run; 
* To make sure that the program runs fluently, count distinct returns;
proc sql;
  create table ret_count
  as select distinct permno, rdq1, ret, mean(ret) as meanret
  from time_series4
  group by permno, rdq1;

  create table time_series5
  as select distinct a.*, b.ret_count
  from time_series4 as a, 
       (select distinct permno, rdq1, count(ret) 
        as ret_count from ret_count group by permno,
        rdq1) as b
  where a.permno = b.permno and a.rdq1 = b.rdq1 and b.ret_count>=40
  order by permno, rdq1, rnumber;
  quit; 

option nonotes; 
proc iml;
use time_series5;
read all var{permno rdq1} into firm;
read all var{vom_ab_pre} into vom_ab_pre;
read all var{absret} into absret;
close;
b = uniqueby(firm, 1:2);  
lala = j(nrow(b),5, 99);         /* 3. Allocate vector to hold results */
lala[,4:5] = firm[b,1:2];
b = b // (nrow(firm)+1);     /* trick: append (n+1) to end of b */
do i = 1 to nrow(b)-1;    /* 4. For each level... */
   idx = b[i]:(b[i+1]-1); /* 5. Find observations in level */
   * demean all series;
   vom_ab_pre_dm = vom_ab_pre[idx]-mean(vom_ab_pre[idx]);
   absret_dm = absret[idx]-mean(absret[idx]);
   if (b[i+1]-b[i])>=40 then do;
   call farmafit(d1,phi,ma,sigma,vom_ab_pre_dm); 
   call farmafit(d2,phi,ma,sigma,absret_dm);

   lala[i,1] = d1; 
   lala[i,2] = d2;
   end; 
end;


create mydat_vom_ab from lala;
append from lala;
close mydat_vom_ab;

quit;
option notes;
data &out; set mydat_vom_ab; 
  rename col3 = permno; 
  rename col4 = rdq1;
  rename col1 = d_vom_ab_pre_&lag; 
  rename col2 = d_ret0;
  format col4 date9.;
  run;
proc sort data = &out; by permno rdq1; run; 
* end; 
%mend;
%persis(end = days-5, lag = 5, out= out2);
data persis;
  merge permno_rdq1 out2;
  by permno rdq1;
  run;

/******************************************************************************
                                Combine and Save
******************************************************************************/

data rep.car_vol;
  merge vom abret persis;
  by permno rdq1;
  run;
